# Boyoon Lee "The Impact of Educational Content on Anti-Immigrant Attitudes"
# 5. Online Appendix E (Table E.7, Figure E.6, Figure E.7)
# Last updated: 2022-11-16


# Initial settings --------------------------------------------------------

# Set directory
setwd("Your Path Here")

# Packages
library(ggplot2) 
library(miceadds) # for lm.cluster
library(tidyr)
library(dplyr)
library(descr)


# Load data  --------------------------------------------------------------
data <- read.csv("./cleaned_TSCS_did.csv", header=TRUE, sep=",")
load("./matched_data_Ver3_5_1.Rda") # Matched data


# Recode / Re-class  ------------------------------------------------------

### Make date variable using Birth year and Birth month
# Since there is no date of birth available, I set the date to the first day of the month
data$date <- as.Date(with(data, paste(birth_yr, birth_month, "01", sep="-")), "%Y-%m-%d")

### Subset
# Data for testing trends after 2001 (Figure E.6)
data2<-subset(data, data$school_yr<2010&data$school_yr>1967)

### Only leave school cohorts before 2002 
# In 2001, Knowing Taiwan series discontinued and replaced with the New Grade 1-9 curriculum (9 year curriculum)
# In addition, from 1968, junior vocational school shut down and expanded senior vocational school
data<-subset(data, data$school_yr<2002&data$school_yr>1967)


# Subsets for the Comparison Group ---------------------------------------

# Main comparison group 
yr13_22<-subset(data,school_yr>1987 & school_yr<2002)
yr13_22<-subset(yr13_22,edu_level>3) # those who graduated from at least junior high schools



##################################################
##    [APPENDIX E] Main Model Results           ##
##################################################


# Table E.7  -----------------------------------------------------------

### Pre-treatment Covariates
# Original data
did.cl.c1 <- lm.cluster(data= yr13_22, 
                        formula= num_imm ~ post + group + TG
                        + female 
                        + post*prts_gen 
                        + as.factor(loc_birth) + as.factor(school_yr) + as.factor(year), 
                        cluster = as.integer(yr13_22$school_yr)*as.integer(yr13_22$group_plus))
summary(did.cl.c1)
# Matched data
psm.cl.1 <- lm.cluster(data= dta_m,
                       formula= num_imm ~ post*group
                       + female 
                       + post*prts_gen 
                       + as.factor(loc_birth)+as.factor(school_yr) + as.factor(year),
                       cluster = dta_m$school_yr*dta_m$group_plus)
summary(psm.cl.1)


### Pre- and Post-treatment Covariates
# Original data
did.cl.c2 <- lm.cluster(data= yr13_22, 
                        formula= num_imm ~ post + group + TG
                        + female + married + employed + social_class + edu_uni
                        + post*prts_gen 
                        + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr) + as.factor(year), 
                        cluster = as.integer(yr13_22$school_yr)*as.integer(yr13_22$group_plus))
summary(did.cl.c2)
# Matched data
psm.cl.2 <- lm.cluster(data= dta_m,
                       formula= num_imm ~ post*group
                       + female + married + employed + social_class+ edu_uni
                       + post*prts_gen 
                       + as.factor(loc_birth)+ as.factor(edu_level) +as.factor(school_yr) + as.factor(year),
                       cluster = dta_m$school_yr*dta_m$group_plus)
summary(psm.cl.2)


# Figure E.6  -----------------------------------------------------------

# Subset to include the school years after 2001 (up to 2009)
yr5_22<-subset(data2,school_yr>1987)
yr5_22<-subset(yr5_22,edu_level>3) # those who graduated from at least junior high schools

# Number of Immigrants (Dummy)
noNA.immdum.tw<-yr5_22[!is.na(yr5_22$num_imm_dummy)&!is.na(yr5_22$group),]
prop_immdum.tw<-noNA.immdum.tw %>% group_by(date,group,num_imm_dummy) %>% summarise (n=n()) %>% mutate(proportion=n/sum(n))
prop_immd_tw<-subset(prop_immdum.tw,prop_immdum.tw$num_imm_dummy==1)
# Rolling average
rolling <- prop_immd_tw %>%
  dplyr::arrange(desc(group)) %>% 
  dplyr::group_by(group) %>% 
  dplyr::mutate(prop_03 = zoo::rollmean(proportion, k = 3, fill = NA),
                prop_06 = zoo::rollmean(proportion, k = 6, fill = NA),
                prop_12 = zoo::rollmean(proportion, k = 12, fill = NA)) %>% 
  dplyr::ungroup()

# Subset by educational path
voc<-subset(rolling, rolling$group==0)
gen<-subset(rolling, rolling$group==1)

# Plot
yr<-c(1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007)
dates<-c(as.Date("1976-09-01"),voc$date[grep("-09-01",voc$date)][1:9],as.Date("1986-09-01"),
         voc$date[grep("-09-01",voc$date)][10],as.Date("1988-09-01"),voc$date[grep("-09-01",voc$date)][11],
         as.Date("1990-09-01"),voc$date[grep("-09-01",voc$date)][12:14],as.Date("1994-09-01"))
par(mar=c(7, 5, 1, 1.5) + 1)
plot(voc$date[1:145],voc$prop_12[1:145], ylim=range(voc$prop_12,gen$prop_12,na.rm=T),main = "",
     ylab = "Proportion of Preference for \nIncreased Immigrants",
     pch=20, col="grey60",cex.lab=1.5,cex.axis=1.2, xaxt="n",xlab="",cex=1.5)
axis(1,dates,labels=yr, cex.axis=1.2,mgp=c(2,1,0),las=2)
lines(voc$date, voc$prop_12,col="grey60", lty=2)
points(gen$date, gen$prop_12, pch=16, col="black", cex=1)
lines(gen$date, gen$prop_12,col="black")
mtext("Year Entered into Junior High School",side=1,line=4.1,cex=1.5)
abline(v=as.numeric(as.Date("1984-09-01")),lty=2,col="red",xpd=FALSE)
abline(v=as.numeric(as.Date("1988-09-01")),lty=2,col="red",xpd=FALSE)
par(xpd=TRUE)
# Add a box to show phase-in period
rect(as.numeric(as.Date("1984-09-01")),0.13,as.numeric(as.Date("1988-09-01")),0.54, border = NA, col= '#00000011')
legend(as.numeric(as.Date("1976-09-01")), 0.035, legend=c("Vocational path","Academic path"),
       col=c("grey60","black"), lwd=2, lty=c(2,1), pch=c(20,16), cex=1.5, box.lty=0,horiz = TRUE)



# Figure E.7  -----------------------------------------------------------

# Estimate the linear model before the introduction of Renshi Taiwan textbooks
before<-yr13_22[yr13_22$post==0,]
did.lmc<-lm.cluster(num_imm ~ group*school_yr
                    + female + married + employed + social_class + edu_uni 
                    +  school_yr*prts_gen
                    + as.factor(school_yr) + as.factor(year) + as.factor(loc_birth) + as.factor(edu_level)
                    , cluster=before$school_yr*before$group_plus, data=before)
summary(did.lmc)

# Calculate fitted values
fit<-did.lmc$lm_res$model
fit$fitted<-did.lmc$lm_res$fitted.values
fit.gen<-subset(fit,fit$group==1)
fit.gen2<-subset(fit.gen,fit.gen$fitted<=4.99)
fit.voc<-subset(fit,fit$group==0)

# Plot parallel trends
par(mar=c(7, 5, 1, 1.5) + 1)
plot(jitter(fit$school_yr,1.7),fit$fitted,
     col=ifelse(fit$group=="1","black","grey80"),
     pch=ifelse(fit$group=="1",19,19),
     #ylim=c(1.4,3.9),
     xlab = " ",
     ylab = "Fitted value of Preference for \n Increased Immigrants",
     cex.lab=1.5,cex.axis=1.2, xaxt="n",main="")
abline(lm(fit.gen$fitted ~ fit.gen$school_yr), col="black", lwd=2, lty=1)
abline(lm(fit.gen2$fitted ~ fit.gen2$school_yr), col="black", lwd=2, lty=2) # fitted line except the extreme value (5.0)
abline(lm(fit.voc$fitted ~ fit.voc$school_yr), col="grey60", lwd=2)
axis(1,fit$school_yr,labels=fit$school_yr, cex.axis=1.2,mgp=c(2,1,0),las=2)
mtext("Year Entered into Junior High School",side=1,line=4.1,cex=1.5)
par(xpd=TRUE)
legend(1988.5,0.4,pch=c(19,19),col=c("grey60","black"),lwd=2,
       c("Vocational path","Academic path"),cex=1.5,box.lty=0,horiz = TRUE)

